
import pandas as pd 
import numpy as np
import time
import cx_Oracle
from sqlalchemy import create_engine


#1特征构建与数据清洗 
def feature_engineing(df,num1=0,num2='N'):
    df = df.set_index(['rating_id']) 
    lst = ['thd','lsdk','lszq','lsgc','zdpwqy','sthjyzsx','sbwf','cpjf','gqdj','gqcz','zzydxgj','swzdwfhmd','zcdybgdbyz','flbzxr_lrsxbs','jyfwbg']
    for col in lst:
        df[col].fillna(num2,inplace=True)
    df['qytxsp'] = (df.qytxsp.astype('str') + ',' + df.ml.astype('str'))
    df['hjxypj'] = (df.hjxypjdj.astype('str')+','+df.hjxypj_hyjz.astype('str') +','+df.ml.astype('str'))
    df['hj_cflx'] = (df.hj_cflx.astype('str')+','+ df.hj_cfje.astype('str'))
    df['rs_cflx'] = (df.rs_cflx.astype('str')+','+df.rs_cfje.astype('str'))
    df['swcf_cflx'] = (df.swcf_cflx.astype('str')+','+df.swcf_cfje.astype('str'))
    df['xzcf_cflx'] = (df.xzcf_cflx.astype('str') + ',' + df.xzcf_cfje.astype('str'))
    df['aqsc_cflx'] = (df.aqsc_cflx.astype('str') + ',' + df.aqsc_cfje.astype('str'))
    df['syhdwj'] = (df.syhdwjs.astype('str')+','+df.hygxd.astype('str'))
    df.bzdjzjf.fillna(num1,inplace=True) #不正纠纷缺失可填充0
    df.jscxlxqy.fillna(num1,inplace=True)#技术创新可填充0
    df.fmyq_yqcs.fillna(num1,inplace=True)#舆情次数可填充为0
    return df 
